In [1]:
%pylab inline
In [2]:
from classy import *
In [3]:
data=load_excel('data/iris.xls',verbose=True)
In [4]:
print(data.vectors.shape)
print(data.targets)
print(data.target_names)
print(data.feature_names)
since you can't plot 4 dimensions, try plotting some 2D subsets
I don't like the automatic placement of the legend, so lets set it manually
In [5]:
subset=extract_features(data,[0,2])
plot2D(subset,legend_location='upper left')
plot all of the combinations
In [6]:
plot_feature_combinations(data)
I don't want to do the classification on this subset, so make sure to use the entire data set.
In [7]:
C=NaiveBayes()
Split the data into test and train subsets...
In [8]:
data_train,data_test=split(data,test_size=0.2)
...and then train...
In [9]:
timeit(reset=True)
C.fit(data_train.vectors,data_train.targets)
print("Training time: ",timeit())
In [10]:
print("On Training Set:",C.percent_correct(data_train.vectors,data_train.targets))
print("On Test Set:",C.percent_correct(data_test.vectors,data_test.targets))
some classifiers have properties that are useful to look at. Naive Bayes has means and stddevs...
In [11]:
C.means
Out[11]:
In [12]:
C.stddevs
Out[12]:
Leave-one-out cross validation is an alternative to splitting the data, so we use the entire data set.
In [13]:
scores, result=leaveoneout_cross_validation(C,data.vectors,data.targets)
print(result)
In [14]:
C=kNearestNeighbor()
timeit(reset=True)
C.fit(data_train.vectors,data_train.targets)
print("Training time: ",timeit())
In [15]:
print("On Training Set:",C.percent_correct(data_train.vectors,data_train.targets))
print("On Test Set:",C.percent_correct(data_test.vectors,data_test.targets))
In [16]:
C=Perceptron()
timeit(reset=True)
C.fit(data_train.vectors,data_train.targets)
print("Training time: ",timeit())
print("On Training Set:",C.percent_correct(data_train.vectors,data_train.targets))
print("On Test Set:",C.percent_correct(data_test.vectors,data_test.targets))
In [17]:
C=BackProp(hidden_layer_sizes = [5])
timeit(reset=True)
C.fit(data_train.vectors,data_train.targets)
print("Training time: ",timeit())
print("On Training Set:",C.percent_correct(data_train.vectors,data_train.targets))
print("On Test Set:",C.percent_correct(data_test.vectors,data_test.targets))
In [18]:
C.n_layers_
Out[18]:
In [19]:
[w.shape for w in C.layers_coef_]
Out[19]: